In [1]:
import joblib
clf=joblib.load('decisiontree.p')
In [2]:
import numpy as np
import pandas as pd
from time import time
from IPython.display import display # Allows the use of display() for DataFrames
# Import supplementary visualization code visuals.py
import visuals as vs
# Pretty display for notebooks
%matplotlib inline
In [3]:
newloans=pd.read_csv('new_loans.csv')
In [4]:
newloans
Out[4]:
id
member_id
loan_amnt
funded_amnt
term
int_rate
exp_default_rate
service_fee_rate
installment
grade
...
sec_app_earliest_cr_line
sec_app_inq_last_6mths
sec_app_mort_acc
sec_app_open_acc
sec_app_revol_util
sec_app_open_il_6m
sec_app_num_rev_accts
sec_app_chargeoff_within_12_mths
sec_app_collections_12_mths_ex_med
sec_app_mths_since_last_major_derog
0
111876788
120425507
2000.0
1650.0
36
7.21
2.06
0.81
61.95
A
...
1
111868402
120417120
9000.0
8650.0
36
11.99
3.99
0.93
298.89
B
...
2
112017019
120565736
8000.0
7125.0
36
14.08
6.59
1.05
273.74
C
...
3
111549780
120064497
7800.0
4875.0
36
21.45
10.49
1.24
295.68
D
...
4
111718954
120270828
12000.0
9100.0
36
16.02
6.59
1.05
422.01
C
...
5
112705560
121335400
13000.0
12025.0
36
18.06
10.49
1.24
470.38
D
...
6
112722043
121351760
4000.0
2825.0
36
7.21
2.06
0.81
123.90
A
...
7
112156293
120758009
6000.0
3550.0
36
19.03
10.49
1.24
220.03
D
...
8
112443397
121071017
4475.0
4325.0
36
15.05
6.59
1.05
155.24
C
...
9
112888880
121535600
9250.0
8925.0
36
13.59
6.59
1.05
314.31
C
...
10
112706451
121336256
4300.0
1500.0
36
15.05
6.59
1.05
149.17
C
...
11
111977459
120526178
7000.0
1400.0
36
10.42
3.99
0.93
227.26
B
...
12
112050937
120603108
23650.0
15750.0
36
7.35
2.06
0.81
734.04
A
...
13
112444385
121070076
7200.0
3875.0
36
16.02
6.59
1.05
253.21
C
...
14
113098522
121777247
12000.0
11700.0
36
9.93
3.99
0.93
386.82
B
...
15
112762391
121409105
5000.0
3975.0
36
20.00
10.49
1.24
185.82
D
...
16
112748987
121395226
3500.0
1200.0
36
14.08
6.59
1.05
119.76
C
...
17
112432825
121040009
10000.0
3225.0
36
13.59
6.59
1.05
339.79
C
...
18
113065122
121716625
8000.0
7325.0
36
16.02
6.59
1.05
281.34
C
...
19
111420996
119935712
39725.0
13100.0
36
23.88
14.69
1.44
1556.03
E
...
20
112708070
121339069
20000.0
14550.0
36
16.02
6.59
1.05
703.34
C
...
21
112957697
121604410
2500.0
650.0
36
10.42
3.99
0.93
81.17
B
...
22
111918090
120466813
16000.0
4750.0
36
11.99
3.99
0.93
531.36
B
...
12-26-2006 16:00:00
0
0
3
63.6
0
4
0
0
23
112872264
121518980
5000.0
4625.0
36
12.62
6.59
1.05
167.56
C
...
24
113086496
121741499
5000.0
4425.0
36
12.62
6.59
1.05
167.56
C
...
25
113185223
121870953
3600.0
3475.0
36
16.02
6.59
1.05
126.61
C
...
26
112711315
121340362
24000.0
17825.0
36
15.05
6.59
1.05
832.56
C
...
27
112788723
121435438
2800.0
1225.0
36
15.05
6.59
1.05
97.14
C
...
28
111684847
120199560
21525.0
10800.0
60
30.75
16.99
1.21
706.36
F
...
29
113074560
121738710
2000.0
1350.0
36
10.91
3.99
0.93
65.40
B
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
958
113489399
122211748
35000.0
5425.0
36
11.99
3.99
0.93
1162.34
B
...
06-13-1994 17:00:00
0
8
19
71.6
0
34
0
0
959
113155790
121838474
35000.0
5350.0
36
22.91
14.69
1.44
1353.20
E
...
960
113177552
121864879
36000.0
6325.0
36
5.32
2.06
0.81
1084.14
A
...
961
113494087
122234936
36000.0
5925.0
36
16.02
6.59
1.05
1266.01
C
...
962
112931290
121578022
35000.0
4850.0
36
11.99
3.99
0.93
1162.34
B
...
963
111096224
119541846
33250.0
2725.0
36
11.99
3.99
0.93
1104.22
B
...
964
111648729
120163442
40000.0
9400.0
36
10.91
3.99
0.93
1307.85
B
...
965
113513994
122255770
32000.0
1350.0
36
9.93
3.99
0.93
1031.50
B
...
966
113231375
121946091
35000.0
4150.0
36
15.05
6.59
1.05
1214.15
C
...
967
113143681
121825398
35000.0
4075.0
36
26.30
14.69
1.44
1415.78
E
...
968
113229319
121944035
35000.0
3900.0
36
7.35
2.06
0.81
1086.31
A
...
969
113127300
121807470
40000.0
8825.0
36
30.79
17.95
1.66
1715.42
G
...
970
113065405
121729999
36000.0
4700.0
36
7.35
2.06
0.81
1117.35
A
...
971
113162997
121848450
35000.0
3250.0
36
23.88
14.69
1.44
1370.95
E
...
11-11-2006 16:00:00
0
4
6
89.1
1
14
0
1
972
113482319
122205378
36200.0
4375.0
36
17.09
10.49
1.24
1292.26
D
...
973
113102596
121782309
35000.0
2950.0
36
11.99
3.99
0.93
1162.34
B
...
974
113233191
121947907
35000.0
2800.0
36
21.45
10.49
1.24
1326.73
D
...
975
113124266
121803274
35000.0
2625.0
36
23.88
14.69
1.44
1370.95
E
...
976
113121884
121803893
37225.0
4800.0
36
11.99
3.99
0.93
1236.23
B
...
05-10-2002 17:00:00
0
1
23
61.5
5
26
0
1
977
113487906
122217362
35000.0
2100.0
36
25.82
14.69
1.44
1406.82
E
...
03-13-2001 16:00:00
0
1
11
69.0
4
14
0
0
978
113098878
121777668
40000.0
6950.0
36
7.21
2.06
0.81
1238.93
A
...
979
113097177
121775888
40000.0
6225.0
36
10.91
3.99
0.93
1307.85
B
...
980
113490815
122219563
40000.0
6200.0
36
22.91
14.69
1.44
1546.52
E
...
981
113096890
121775619
40000.0
5550.0
36
9.93
3.99
0.93
1289.38
B
...
982
110726699
119140533
40000.0
5525.0
36
10.42
3.99
0.93
1298.59
B
...
983
113513208
122252691
38625.0
2100.0
36
7.07
2.06
0.81
1193.87
A
...
984
113067608
121732304
40000.0
2325.0
36
15.05
6.59
1.05
1387.60
C
...
10-08-2006 17:00:00
4
3
22
49.5
0
32
0
0
985
113129406
121808782
40000.0
1500.0
36
10.91
3.99
0.93
1307.85
B
...
986
113496147
122235011
40000.0
1475.0
36
16.02
6.59
1.05
1406.68
C
...
987
113486281
122212572
40000.0
1325.0
36
7.35
2.06
0.81
1241.50
A
...
988 rows × 121 columns
In [5]:
newloans=newloans.query('purpose=="Credit card refinancing"|purpose=="Debt consolidation"')
newloans=newloans.query('term==36')
In [6]:
finalkeep=['id','int_rate','loan_amnt','installment', 'emp_length', 'home_ownership',
'annual_inc','purpose',
'dti', 'delinq_2yrs', 'earliest_cr_line',
'inq_last_6mths','open_acc', 'pub_rec', 'revol_bal',
'revol_util', 'total_acc','collections_12_mths_ex_med']
final=newloans[finalkeep].copy()
In [7]:
keepfeat=['loan_amnt', 'emp_length', 'home_ownership',
'annual_inc','purpose',
'dti', 'delinq_2yrs', 'earliest_cr_line',
'inq_last_6mths','open_acc', 'pub_rec', 'revol_bal',
'revol_util', 'total_acc','collections_12_mths_ex_med']
newloans=newloans[keepfeat]
newloans.head(n=10)
Out[7]:
loan_amnt
emp_length
home_ownership
annual_inc
purpose
dti
delinq_2yrs
earliest_cr_line
inq_last_6mths
open_acc
pub_rec
revol_bal
revol_util
total_acc
collections_12_mths_ex_med
0
2000.0
7 years
MORTGAGE
90000.0
Credit card refinancing
23.91
1
06-24-2007 17:00:00
0
10
0
5524.0
32.1
17
0
1
9000.0
9 years
MORTGAGE
65000.0
Debt consolidation
21.01
5
08-25-1991 17:00:00
0
13
0
3685.0
15.2
39
0
3
7800.0
n/a
RENT
29135.0
Debt consolidation
29.38
0
09-18-1999 17:00:00
0
5
0
675.0
18.8
44
0
5
13000.0
3 years
RENT
46000.0
Credit card refinancing
17.01
0
10-29-2013 17:00:00
0
5
0
12466.0
79.9
7
0
7
6000.0
< 1 year
RENT
30000.0
Debt consolidation
10.92
2
07-29-2004 17:00:00
0
7
0
3247.0
44.5
16
0
8
4475.0
9 years
RENT
35000.0
Credit card refinancing
17.97
0
11-07-2013 16:00:00
1
7
0
3929.0
43.7
9
0
9
9250.0
n/a
OWN
48000.0
Debt consolidation
31.12
3
12-07-1993 16:00:00
2
17
0
11082.0
34.2
34
0
10
4300.0
n/a
RENT
12000.0
Credit card refinancing
14.90
0
07-29-2000 17:00:00
1
8
1
3763.0
54.5
24
0
11
7000.0
n/a
RENT
24708.0
Debt consolidation
10.20
0
12-25-1997 16:00:00
0
5
1
6497.0
52.4
9
0
12
23650.0
10+ years
MORTGAGE
529000.0
Credit card refinancing
21.67
0
03-23-1997 16:00:00
0
22
0
312921.0
49.0
38
0
In [8]:
newloans['earliest_cr_line']=pd.to_datetime(newloans.earliest_cr_line, format='%m-%d-%Y %H:%M:%S')
newloans['earliest_cr_line']=newloans.earliest_cr_line.dt.year
newloans.earliest_cr_line.head(n=10)
Out[8]:
0 2007
1 1991
3 1999
5 2013
7 2004
8 2013
9 1993
10 2000
11 1997
12 1997
Name: earliest_cr_line, dtype: int64
In [9]:
newloans.emp_length=newloans.emp_length.str.replace(' year','')
newloans.emp_length=newloans.emp_length.str.replace('s','')
newloans.emp_length=newloans.emp_length.str.replace('+','')
newloans.emp_length=newloans.emp_length.str.replace('< 1','0.5')
newloans.emp_length=newloans.emp_length.str.replace('n/a','0')
newloans.emp_length=newloans.emp_length.apply(pd.to_numeric)
newloans.emp_length.dtype
Out[9]:
dtype('float64')
In [10]:
newloans
Out[10]:
loan_amnt
emp_length
home_ownership
annual_inc
purpose
dti
delinq_2yrs
earliest_cr_line
inq_last_6mths
open_acc
pub_rec
revol_bal
revol_util
total_acc
collections_12_mths_ex_med
0
2000.0
7.0
MORTGAGE
90000.0
Credit card refinancing
23.91
1
2007
0
10
0
5524.0
32.1
17
0
1
9000.0
9.0
MORTGAGE
65000.0
Debt consolidation
21.01
5
1991
0
13
0
3685.0
15.2
39
0
3
7800.0
0.0
RENT
29135.0
Debt consolidation
29.38
0
1999
0
5
0
675.0
18.8
44
0
5
13000.0
3.0
RENT
46000.0
Credit card refinancing
17.01
0
2013
0
5
0
12466.0
79.9
7
0
7
6000.0
0.5
RENT
30000.0
Debt consolidation
10.92
2
2004
0
7
0
3247.0
44.5
16
0
8
4475.0
9.0
RENT
35000.0
Credit card refinancing
17.97
0
2013
1
7
0
3929.0
43.7
9
0
9
9250.0
0.0
OWN
48000.0
Debt consolidation
31.12
3
1993
2
17
0
11082.0
34.2
34
0
10
4300.0
0.0
RENT
12000.0
Credit card refinancing
14.90
0
2000
1
8
1
3763.0
54.5
24
0
11
7000.0
0.0
RENT
24708.0
Debt consolidation
10.20
0
1997
0
5
1
6497.0
52.4
9
0
12
23650.0
10.0
MORTGAGE
529000.0
Credit card refinancing
21.67
0
1997
0
22
0
312921.0
49.0
38
0
15
5000.0
5.0
RENT
92000.0
Debt consolidation
12.34
0
2003
0
4
1
447.0
49.7
7
0
16
3500.0
0.5
RENT
45000.0
Debt consolidation
7.97
0
2002
2
15
0
5093.0
21.0
29
0
17
10000.0
0.5
MORTGAGE
65000.0
Debt consolidation
15.66
1
1998
0
9
1
10630.0
54.2
21
0
18
8000.0
0.5
RENT
33000.0
Debt consolidation
11.56
0
1998
0
15
1
5079.0
22.9
31
0
19
39725.0
10.0
MORTGAGE
120000.0
Debt consolidation
31.39
0
1988
1
18
0
31750.0
56.5
35
0
20
20000.0
3.0
MORTGAGE
75000.0
Debt consolidation
16.93
0
2013
0
7
0
990.0
21.1
14
0
22
16000.0
9.0
RENT
25000.0
Debt consolidation
26.98
0
2007
0
6
1
19324.0
70.5
11
0
26
24000.0
5.0
RENT
185000.0
Debt consolidation
8.63
0
2011
0
7
0
11773.0
21.8
7
0
29
2000.0
8.0
OWN
30000.0
Credit card refinancing
32.12
3
2005
0
13
0
13513.0
80.4
21
0
30
15000.0
0.0
MORTGAGE
40000.0
Debt consolidation
29.61
0
1975
0
7
0
4289.0
32.0
14
0
32
10000.0
10.0
MORTGAGE
55000.0
Debt consolidation
10.58
0
2013
1
9
0
4892.0
74.1
12
0
34
15000.0
0.5
RENT
50000.0
Credit card refinancing
27.53
0
2011
0
15
0
12938.0
55.8
21
0
37
16000.0
0.5
RENT
30000.0
Debt consolidation
8.36
0
2013
0
5
0
9983.0
28.9
5
0
41
6000.0
0.5
MORTGAGE
21256.0
Debt consolidation
27.77
0
2001
0
12
0
11511.0
66.2
16
0
42
5050.0
8.0
RENT
25000.0
Debt consolidation
6.05
0
2004
0
2
0
4258.0
28.8
6
0
43
10000.0
0.0
MORTGAGE
50000.0
Credit card refinancing
30.72
0
1998
0
14
0
12504.0
38.7
22
0
44
6000.0
0.5
MORTGAGE
40000.0
Credit card refinancing
48.09
0
1993
0
12
0
25263.0
79.7
25
0
46
15000.0
10.0
MORTGAGE
85000.0
Credit card refinancing
29.37
0
1993
1
14
0
11752.0
74.0
15
0
48
13000.0
2.0
OWN
15000.0
Debt consolidation
50.80
0
2010
1
12
0
7031.0
46.6
17
0
49
10000.0
10.0
OWN
80600.0
Debt consolidation
31.21
0
1989
1
14
0
34924.0
76.3
56
0
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
950
35000.0
10.0
MORTGAGE
205000.0
Debt consolidation
10.30
1
2003
0
10
0
35717.0
82.5
36
0
951
36000.0
0.5
MORTGAGE
124000.0
Debt consolidation
1.81
0
2002
0
11
0
6438.0
14.8
16
0
952
32000.0
10.0
MORTGAGE
220000.0
Credit card refinancing
24.79
0
1992
0
28
0
142412.0
27.9
42
0
955
32000.0
2.0
MORTGAGE
110000.0
Credit card refinancing
22.02
0
1999
1
16
0
22172.0
48.5
28
0
956
30000.0
0.5
RENT
48000.0
Credit card refinancing
36.53
0
2006
0
7
0
5113.0
56.2
13
0
957
31000.0
2.0
RENT
125000.0
Credit card refinancing
9.01
0
2007
0
20
0
17658.0
59.7
45
0
958
35000.0
0.0
MORTGAGE
0.0
Credit card refinancing
9999.00
0
1998
0
11
0
29543.0
61.5
37
0
960
36000.0
7.0
RENT
290000.0
Credit card refinancing
21.94
0
2001
0
16
0
103688.0
24.9
28
0
961
36000.0
7.0
RENT
132000.0
Credit card refinancing
7.09
0
2003
0
6
0
29814.0
80.1
17
0
962
35000.0
0.5
MORTGAGE
123000.0
Debt consolidation
25.46
7
1998
1
17
0
31317.0
70.2
35
0
963
33250.0
10.0
MORTGAGE
205000.0
Credit card refinancing
18.41
0
2006
0
20
0
57286.0
43.8
34
0
965
32000.0
1.0
RENT
145000.0
Debt consolidation
15.62
0
2003
0
32
0
35789.0
56.8
55
0
966
35000.0
6.0
RENT
160000.0
Debt consolidation
25.93
0
2001
0
21
0
29883.0
95.2
30
0
968
35000.0
6.0
MORTGAGE
256000.0
Credit card refinancing
22.68
0
2001
1
28
0
205654.0
30.4
68
0
969
40000.0
10.0
MORTGAGE
120000.0
Debt consolidation
24.16
4
1997
3
16
0
18794.0
35.3
48
0
970
36000.0
5.0
MORTGAGE
340000.0
Credit card refinancing
15.23
0
1994
0
14
0
187696.0
73.7
29
0
971
35000.0
0.5
OWN
60000.0
Debt consolidation
14.90
1
2007
1
12
0
12086.0
79.5
23
0
973
35000.0
10.0
MORTGAGE
166000.0
Debt consolidation
13.97
0
2001
1
13
0
41175.0
69.9
21
0
974
35000.0
1.0
MORTGAGE
72000.0
Credit card refinancing
17.33
0
2006
1
8
0
41664.0
56.7
15
0
975
35000.0
4.0
RENT
106000.0
Debt consolidation
28.81
0
2003
2
23
0
34879.0
73.4
29
0
976
37225.0
0.5
MORTGAGE
65000.0
Credit card refinancing
27.44
0
2000
0
12
0
33940.0
50.4
24
0
977
35000.0
0.5
MORTGAGE
49940.0
Debt consolidation
36.82
0
2001
1
7
1
14566.0
55.2
23
0
978
40000.0
6.0
MORTGAGE
135000.0
Debt consolidation
17.03
0
1987
0
18
0
26473.0
31.4
69
0
979
40000.0
3.0
MORTGAGE
96000.0
Debt consolidation
14.25
1
2000
0
11
1
24484.0
66.4
14
0
980
40000.0
10.0
OWN
103000.0
Debt consolidation
13.55
0
1991
1
6
0
8664.0
30.9
41
0
981
40000.0
6.0
RENT
136000.0
Debt consolidation
21.34
0
2003
1
19
0
2681.0
5.3
46
0
983
38625.0
4.0
MORTGAGE
177000.0
Debt consolidation
17.79
0
2001
0
12
2
16607.0
38.7
25
0
984
40000.0
0.5
MORTGAGE
35500.0
Debt consolidation
26.89
0
2011
0
24
0
20873.0
36.2
29
0
985
40000.0
5.0
MORTGAGE
95000.0
Debt consolidation
20.45
0
1999
3
7
0
39812.0
66.8
19
0
987
40000.0
5.0
RENT
509000.0
Debt consolidation
11.42
0
1995
1
14
0
23628.0
58.2
44
0
726 rows × 15 columns
In [11]:
import imp
imp.reload(vs)
vs.distribution(newloans)
In [12]:
skewed = ['annual_inc','delinq_2yrs','open_acc', 'pub_rec','revol_bal','total_acc', 'collections_12_mths_ex_med']
newloans_raw=newloans.copy()
newloans_raw[skewed] = newloans[skewed].apply(lambda x: np.log(x + 1))
imp.reload(vs)
vs.distribution(newloans_raw, transformed = True)
In [13]:
from sklearn.preprocessing import MinMaxScaler
# Initialize a scaler, then apply it to the features
scaler = MinMaxScaler()
numerical = ['loan_amnt','emp_length', 'annual_inc','dti','delinq_2yrs','earliest_cr_line','inq_last_6mths','open_acc', 'pub_rec','revol_bal','revol_util','total_acc', 'collections_12_mths_ex_med']
newloans_raw[numerical] = scaler.fit_transform(newloans_raw[numerical])
# Show an example of a record with scaling applied
display(newloans_raw.head(n = 10))
loan_amnt
emp_length
home_ownership
annual_inc
purpose
dti
delinq_2yrs
earliest_cr_line
inq_last_6mths
open_acc
pub_rec
revol_bal
revol_util
total_acc
collections_12_mths_ex_med
0
0.000000
0.70
MORTGAGE
0.865604
Credit card refinancing
0.002240
0.278943
0.857143
0.00
0.479785
0.000000
0.458143
0.311111
0.510820
0.0
1
0.184211
0.90
MORTGAGE
0.840911
Debt consolidation
0.001949
0.721057
0.530612
0.00
0.568839
0.000000
0.403813
0.140404
0.782011
0.0
3
0.152632
0.00
RENT
0.780023
Debt consolidation
0.002787
0.000000
0.693878
0.00
0.255958
0.000000
0.176139
0.176768
0.822013
0.0
5
0.289474
0.30
RENT
0.814677
Credit card refinancing
0.001549
0.000000
0.979592
0.00
0.255958
0.000000
0.567383
0.793939
0.235409
0.0
7
0.105263
0.05
RENT
0.782243
Debt consolidation
0.000940
0.442114
0.795918
0.00
0.362190
0.000000
0.386833
0.436364
0.491407
0.0
8
0.065132
0.90
RENT
0.793940
Credit card refinancing
0.001645
0.000000
0.979592
0.25
0.362190
0.000000
0.412418
0.428283
0.311194
0.0
9
0.190789
0.00
OWN
0.817906
Debt consolidation
0.002961
0.557886
0.571429
0.50
0.661642
0.000000
0.551587
0.332323
0.736661
0.0
10
0.060526
0.00
RENT
0.712719
Credit card refinancing
0.001338
0.000000
0.714286
0.25
0.405684
0.386853
0.406624
0.537374
0.622387
0.0
11
0.131579
0.00
RENT
0.767518
Debt consolidation
0.000868
0.000000
0.653061
0.00
0.255958
0.386853
0.479918
0.516162
0.311194
0.0
12
0.569737
1.00
MORTGAGE
1.000000
Credit card refinancing
0.002016
0.000000
0.653061
0.00
0.752158
0.000000
1.000000
0.481818
0.773413
0.0
In [14]:
newloans_raw.replace("Credit card refinancing",'credit_card',inplace=True)
newloans_raw.replace("Debt consolidation",'debt_consolidation',inplace=True)
In [15]:
feat = pd.get_dummies(newloans_raw)
#print(income.head(n=10))
# Print the number of features after one-hot encoding
encoded = list(feat.columns)
print ("{} total features after one-hot encoding.".format(len(encoded)))
# Uncomment the following line to see the encoded feature names
print (encoded)
18 total features after one-hot encoding.
['loan_amnt', 'emp_length', 'annual_inc', 'dti', 'delinq_2yrs', 'earliest_cr_line', 'inq_last_6mths', 'open_acc', 'pub_rec', 'revol_bal', 'revol_util', 'total_acc', 'collections_12_mths_ex_med', 'home_ownership_MORTGAGE', 'home_ownership_OWN', 'home_ownership_RENT', 'purpose_credit_card', 'purpose_debt_consolidation']
In [16]:
newloans_raw.head(10)
Out[16]:
loan_amnt
emp_length
home_ownership
annual_inc
purpose
dti
delinq_2yrs
earliest_cr_line
inq_last_6mths
open_acc
pub_rec
revol_bal
revol_util
total_acc
collections_12_mths_ex_med
0
0.000000
0.70
MORTGAGE
0.865604
credit_card
0.002240
0.278943
0.857143
0.00
0.479785
0.000000
0.458143
0.311111
0.510820
0.0
1
0.184211
0.90
MORTGAGE
0.840911
debt_consolidation
0.001949
0.721057
0.530612
0.00
0.568839
0.000000
0.403813
0.140404
0.782011
0.0
3
0.152632
0.00
RENT
0.780023
debt_consolidation
0.002787
0.000000
0.693878
0.00
0.255958
0.000000
0.176139
0.176768
0.822013
0.0
5
0.289474
0.30
RENT
0.814677
credit_card
0.001549
0.000000
0.979592
0.00
0.255958
0.000000
0.567383
0.793939
0.235409
0.0
7
0.105263
0.05
RENT
0.782243
debt_consolidation
0.000940
0.442114
0.795918
0.00
0.362190
0.000000
0.386833
0.436364
0.491407
0.0
8
0.065132
0.90
RENT
0.793940
credit_card
0.001645
0.000000
0.979592
0.25
0.362190
0.000000
0.412418
0.428283
0.311194
0.0
9
0.190789
0.00
OWN
0.817906
debt_consolidation
0.002961
0.557886
0.571429
0.50
0.661642
0.000000
0.551587
0.332323
0.736661
0.0
10
0.060526
0.00
RENT
0.712719
credit_card
0.001338
0.000000
0.714286
0.25
0.405684
0.386853
0.406624
0.537374
0.622387
0.0
11
0.131579
0.00
RENT
0.767518
debt_consolidation
0.000868
0.000000
0.653061
0.00
0.255958
0.386853
0.479918
0.516162
0.311194
0.0
12
0.569737
1.00
MORTGAGE
1.000000
credit_card
0.002016
0.000000
0.653061
0.00
0.752158
0.000000
1.000000
0.481818
0.773413
0.0
In [17]:
final['predictedclass']=clf.predict(feat)
In [18]:
goodloans=final.query('predictedclass==1')
In [19]:
goodloans.head(10)
Out[19]:
id
int_rate
loan_amnt
installment
emp_length
home_ownership
annual_inc
purpose
dti
delinq_2yrs
earliest_cr_line
inq_last_6mths
open_acc
pub_rec
revol_bal
revol_util
total_acc
collections_12_mths_ex_med
predictedclass
0
111876788
7.21
2000.0
61.95
7 years
MORTGAGE
90000.0
Credit card refinancing
23.91
1
06-24-2007 17:00:00
0
10
0
5524.0
32.1
17
0
1
3
111549780
21.45
7800.0
295.68
n/a
RENT
29135.0
Debt consolidation
29.38
0
09-18-1999 17:00:00
0
5
0
675.0
18.8
44
0
1
9
112888880
13.59
9250.0
314.31
n/a
OWN
48000.0
Debt consolidation
31.12
3
12-07-1993 16:00:00
2
17
0
11082.0
34.2
34
0
1
10
112706451
15.05
4300.0
149.17
n/a
RENT
12000.0
Credit card refinancing
14.90
0
07-29-2000 17:00:00
1
8
1
3763.0
54.5
24
0
1
12
112050937
7.35
23650.0
734.04
10+ years
MORTGAGE
529000.0
Credit card refinancing
21.67
0
03-23-1997 16:00:00
0
22
0
312921.0
49.0
38
0
1
15
112762391
20.00
5000.0
185.82
5 years
RENT
92000.0
Debt consolidation
12.34
0
04-05-2003 16:00:00
0
4
1
447.0
49.7
7
0
1
18
113065122
16.02
8000.0
281.34
< 1 year
RENT
33000.0
Debt consolidation
11.56
0
01-08-1998 16:00:00
0
15
1
5079.0
22.9
31
0
1
26
112711315
15.05
24000.0
832.56
5 years
RENT
185000.0
Debt consolidation
8.63
0
11-30-2011 16:00:00
0
7
0
11773.0
21.8
7
0
1
32
113072246
13.59
10000.0
339.79
10+ years
MORTGAGE
55000.0
Debt consolidation
10.58
0
02-09-2013 16:00:00
1
9
0
4892.0
74.1
12
0
1
34
111863089
16.02
15000.0
527.51
< 1 year
RENT
50000.0
Credit card refinancing
27.53
0
03-27-2011 17:00:00
0
15
0
12938.0
55.8
21
0
1
In [20]:
goodloans.sort_values('int_rate',ascending=False)
Out[20]:
id
int_rate
loan_amnt
installment
emp_length
home_ownership
annual_inc
purpose
dti
delinq_2yrs
earliest_cr_line
inq_last_6mths
open_acc
pub_rec
revol_bal
revol_util
total_acc
collections_12_mths_ex_med
predictedclass
390
113088975
30.84
25825.0
1108.23
< 1 year
MORTGAGE
64000.0
Debt consolidation
32.55
1
07-09-1984 17:00:00
1
10
0
56948.0
33.6
18
0
1
481
111630307
30.84
26625.0
1142.56
10+ years
MORTGAGE
77000.0
Credit card refinancing
34.10
0
10-08-2001 17:00:00
1
17
0
25388.0
59.0
36
0
1
426
112809805
30.84
15000.0
643.70
10+ years
RENT
60000.0
Debt consolidation
24.54
0
05-09-2001 17:00:00
1
9
1
18630.0
77.3
22
0
1
746
113536597
29.69
12000.0
507.39
8 years
RENT
85000.0
Debt consolidation
21.84
0
05-16-2004 17:00:00
3
16
0
24183.0
43.3
21
0
1
870
112380974
26.30
22175.0
897.00
7 years
RENT
50400.0
Debt consolidation
16.86
0
12-11-2007 16:00:00
1
8
0
7904.0
51.0
17
0
1
751
113515708
26.30
12000.0
485.41
3 years
RENT
110000.0
Debt consolidation
24.85
1
07-15-1985 17:00:00
3
17
3
12304.0
33.6
24
0
1
977
113487906
25.82
35000.0
1406.82
< 1 year
MORTGAGE
49940.0
Debt consolidation
36.82
0
03-13-2001 16:00:00
1
7
1
14566.0
55.2
23
0
1
72
113228593
25.82
3000.0
120.59
< 1 year
RENT
48000.0
Debt consolidation
21.65
0
11-13-2011 16:00:00
1
5
0
4466.0
99.2
7
0
1
947
113154870
25.82
32075.0
1289.25
2 years
MORTGAGE
69757.0
Credit card refinancing
29.73
0
07-11-2002 17:00:00
3
44
1
47003.0
42.9
57
0
1
681
113511023
24.85
8500.0
337.29
9 years
MORTGAGE
125000.0
Debt consolidation
6.59
0
07-15-2004 17:00:00
1
14
3
8413.0
13.4
17
0
1
153
113090366
24.85
26300.0
1043.60
4 years
RENT
117000.0
Debt consolidation
10.14
1
09-09-2003 17:00:00
1
15
0
26438.0
31.0
19
0
1
207
112765108
24.85
13075.0
518.83
10+ years
RENT
90000.0
Debt consolidation
3.76
1
03-12-2001 16:00:00
0
3
0
2569.0
79.3
9
0
1
310
113123710
24.85
16000.0
634.89
10+ years
RENT
53000.0
Debt consolidation
26.25
1
10-10-2006 17:00:00
1
13
0
7509.0
23.8
17
0
1
760
112872994
23.88
12000.0
470.04
7 years
RENT
60000.0
Credit card refinancing
25.21
1
11-10-2001 16:00:00
2
18
1
11525.0
74.8
34
0
1
317
112913395
23.88
10000.0
391.70
< 1 year
RENT
36000.0
Debt consolidation
22.23
0
08-11-2012 17:00:00
1
4
0
10532.0
65.0
5
0
1
342
112915134
23.88
6950.0
272.24
3 years
RENT
40000.0
Debt consolidation
31.97
0
05-12-2003 17:00:00
1
12
0
8330.0
28.1
16
0
1
156
112989734
23.88
15000.0
587.55
5 years
MORTGAGE
49000.0
Debt consolidation
26.62
0
03-09-2002 16:00:00
1
9
5
9867.0
69.5
21
0
1
971
113162997
23.88
35000.0
1370.95
< 1 year
OWN
60000.0
Debt consolidation
14.90
1
07-11-2007 17:00:00
1
12
0
12086.0
79.5
23
0
1
217
113153544
21.45
4300.0
163.00
10+ years
RENT
40000.0
Debt consolidation
34.11
1
09-11-1994 17:00:00
1
10
0
637.0
37.5
17
0
1
3
111549780
21.45
7800.0
295.68
n/a
RENT
29135.0
Debt consolidation
29.38
0
09-18-1999 17:00:00
0
5
0
675.0
18.8
44
0
1
897
113162495
21.45
30000.0
1137.20
< 1 year
RENT
65000.0
Debt consolidation
19.79
0
10-11-1996 17:00:00
0
11
1
21443.0
69.3
21
0
1
929
112810710
21.45
31000.0
1175.11
10+ years
RENT
80000.0
Debt consolidation
13.59
0
03-10-1997 16:00:00
1
26
1
7517.0
10.0
41
0
1
344
113485860
21.45
12200.0
462.46
2 years
RENT
70000.0
Debt consolidation
14.92
0
03-13-2008 17:00:00
0
10
0
5930.0
61.8
30
0
1
801
113228081
21.45
15000.0
568.60
7 years
MORTGAGE
200000.0
Credit card refinancing
16.03
0
01-13-1997 16:00:00
4
32
0
35329.0
60.2
56
0
1
531
112929814
21.45
7800.0
295.68
2 years
MORTGAGE
62500.0
Debt consolidation
13.15
1
03-09-2004 16:00:00
0
9
0
5467.0
63.6
22
0
1
269
113295239
21.45
6050.0
229.34
2 years
MORTGAGE
66000.0
Debt consolidation
15.84
0
06-13-2003 17:00:00
4
11
0
8465.0
48.0
17
0
1
974
113233191
21.45
35000.0
1326.73
1 year
MORTGAGE
72000.0
Credit card refinancing
17.33
0
07-14-2006 17:00:00
1
8
0
41664.0
56.7
15
0
1
403
112861557
21.45
10000.0
379.07
8 years
RENT
35000.0
Credit card refinancing
30.11
0
08-11-2004 17:00:00
1
17
0
17566.0
98.1
40
0
1
370
113100122
21.45
6000.0
227.44
10+ years
MORTGAGE
74000.0
Credit card refinancing
22.67
0
10-10-1983 17:00:00
2
29
1
17793.0
35.6
50
0
1
333
113201786
21.45
6025.0
228.39
5 years
OWN
40000.0
Debt consolidation
16.05
0
01-13-1999 16:00:00
2
10
0
6727.0
26.9
12
0
1
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
501
113163909
7.07
15600.0
482.19
1 year
RENT
45000.0
Debt consolidation
28.13
0
12-12-1986 16:00:00
0
11
0
17892.0
34.3
25
0
1
731
113535113
7.07
10000.0
309.10
5 years
MORTGAGE
85000.0
Debt consolidation
21.40
0
05-16-1996 17:00:00
0
8
0
5937.0
24.2
60
0
1
890
113536120
7.07
20000.0
618.19
n/a
MORTGAGE
60000.0
Debt consolidation
18.58
0
01-16-1972 16:00:00
0
15
0
9723.0
18.4
32
0
1
170
113164862
7.07
4000.0
123.64
2 years
RENT
49000.0
Credit card refinancing
27.95
0
07-12-2009 17:00:00
1
9
0
3930.0
10.1
9
0
1
983
113513208
7.07
38625.0
1193.87
4 years
MORTGAGE
177000.0
Debt consolidation
17.79
0
07-15-2001 17:00:00
0
12
2
16607.0
38.7
25
0
1
293
113161845
7.07
8000.0
247.28
2 years
RENT
50000.0
Debt consolidation
25.64
0
04-11-2010 17:00:00
1
8
0
10496.0
44.7
15
0
1
485
113193460
5.32
6025.0
181.45
2 years
RENT
58000.0
Debt consolidation
16.76
0
11-12-2001 16:00:00
1
14
0
620.0
2.0
26
0
1
495
113495327
5.32
5000.0
150.58
3 years
RENT
53000.0
Credit card refinancing
30.10
0
03-14-2007 17:00:00
0
16
0
10496.0
49.4
23
0
1
849
113306630
5.32
20000.0
602.30
6 years
MORTGAGE
100000.0
Debt consolidation
13.12
0
07-16-1998 17:00:00
0
9
0
32418.0
27.1
16
0
1
366
113127922
5.32
19000.0
572.19
10+ years
MORTGAGE
103000.0
Credit card refinancing
18.77
0
02-11-1997 16:00:00
0
17
0
19207.0
27.2
36
0
1
334
112800905
5.32
21000.0
632.42
4 years
MORTGAGE
160000.0
Credit card refinancing
11.84
0
03-11-2004 16:00:00
0
12
0
17336.0
48.0
22
0
1
256
113165853
5.32
7000.0
210.81
6 years
MORTGAGE
56000.0
Debt consolidation
20.20
0
12-12-2003 16:00:00
1
8
0
2811.0
17.2
26
0
1
219
112931402
5.32
15000.0
451.73
10+ years
MORTGAGE
125000.0
Credit card refinancing
6.08
0
08-11-1999 17:00:00
0
11
0
15588.0
23.7
24
0
1
361
113180686
5.32
15000.0
451.73
8 years
RENT
108000.0
Debt consolidation
13.95
0
09-12-1994 17:00:00
0
8
0
13082.0
26.9
16
0
1
600
113515496
5.32
5000.0
150.58
3 years
MORTGAGE
68000.0
Debt consolidation
20.63
3
04-15-1999 17:00:00
0
24
0
11515.0
27.4
34
0
1
513
112695870
5.32
13600.0
409.57
4 years
RENT
75000.0
Credit card refinancing
23.49
0
11-13-2003 16:00:00
0
6
0
17773.0
51.5
17
0
1
540
113456817
5.32
2500.0
75.29
10+ years
MORTGAGE
71000.0
Debt consolidation
16.04
0
03-16-2002 16:00:00
0
12
0
31653.0
57.4
29
0
1
925
113163593
5.32
26000.0
782.99
10+ years
MORTGAGE
58240.0
Debt consolidation
27.39
0
09-11-2001 17:00:00
1
15
0
28579.0
51.6
29
0
1
695
113543293
5.32
8000.0
240.92
< 1 year
RENT
80000.0
Debt consolidation
20.40
0
10-16-2007 17:00:00
1
13
0
11127.0
34.4
21
0
1
716
113484518
5.32
13000.0
391.50
< 1 year
RENT
160000.0
Debt consolidation
18.53
0
02-13-1992 16:00:00
0
15
0
40833.0
57.7
21
0
1
905
113194718
5.32
25500.0
767.93
4 years
MORTGAGE
159500.0
Debt consolidation
19.22
0
07-12-1998 17:00:00
1
21
0
8493.0
6.9
41
0
1
738
113491830
5.32
10000.0
301.15
10+ years
MORTGAGE
75000.0
Debt consolidation
22.59
0
05-14-1993 17:00:00
0
14
0
26765.0
58.6
27
0
1
742
113445886
5.32
10000.0
301.15
10+ years
MORTGAGE
45000.0
Debt consolidation
18.64
0
08-14-2006 17:00:00
0
14
0
11761.0
30.6
20
0
1
802
113501718
5.32
13500.0
406.55
< 1 year
MORTGAGE
74000.0
Credit card refinancing
17.63
0
12-15-2002 16:00:00
0
8
0
15082.0
52.9
19
0
1
820
113491192
5.32
15000.0
451.73
10+ years
MORTGAGE
100000.0
Debt consolidation
21.00
0
03-13-1988 16:00:00
0
11
0
38174.0
57.1
32
0
1
887
113493462
5.32
20000.0
602.30
8 years
MORTGAGE
103000.0
Credit card refinancing
22.16
0
06-13-1980 17:00:00
1
6
0
16927.0
45.7
21
0
1
837
113494553
5.32
18000.0
542.07
1 year
OWN
78000.0
Debt consolidation
27.51
0
03-14-1993 16:00:00
1
16
0
17042.0
29.0
30
0
1
881
113503422
5.32
20000.0
602.30
10+ years
MORTGAGE
150000.0
Credit card refinancing
18.89
0
09-15-2000 17:00:00
0
14
0
17218.0
19.8
24
0
1
840
113191183
5.32
20000.0
602.30
2 years
RENT
140000.0
Credit card refinancing
23.12
0
10-12-2003 17:00:00
1
8
0
18891.0
24.6
37
0
1
536
113510922
5.32
4000.0
120.46
2 years
MORTGAGE
45000.0
Credit card refinancing
11.76
0
10-15-1986 17:00:00
2
14
0
6681.0
20.2
24
0
1
355 rows × 19 columns
In [ ]:
In [ ]:
Content source: mccormd1/LCandR
Similar notebooks: